# set directory
setwd("~/Research/Mycorrhizae/Fieldwork/Genetics/NGS sequencing/Code")

# load packages
library(dada2)
library("phyloseq")
library(ggplot2)
library(tidyverse)
library(fantaxtic)
library(DECIPHER)
library(phangorn)
library(reshape)
library(janitor)
library(multcomp)
library(MuMIn)
library(SciViews)
library(mblm)

# load phyloseq object and exploration type and guild data
load("~/Research/Mycorrhizae/Fieldwork/Genetics/NGS sequencing/Code/RA otu by guild and exploration type.RData")

# load phyloseq object and exploration type and guild data
load("~/Research/Mycorrhizae/Fieldwork/Genetics/NGS sequencing/Code/RA otu by guild and exploration type_new.RData")

# read in soil data
soil.data <- read.csv("soil.data.final.csv")

# add covariates to phyloseq object
sample_data(physeq)$organic.soil.C.N <- soil.data$organic.soil.C.N[1:96]
sample_data(physeq)$organic.soil.GWC <- soil.data$organic.soil.GWC[1:96]
sample_data(physeq)$carcass.deposition <- soil.data$carcass.deposition[1:96]
sample_data(physeq)$ph <- soil.data$ph[1:96]
sample_data(physeq)$decomposing.carcass <- soil.data$decomposing.carcass[1:96]
sample_data(physeq)$white.spruce <- soil.data$white.spruce[1:96]
sample_data(physeq)$paper.birch <- soil.data$paper.birch[1:96]
sample_data(physeq)$dwarf.birch <- soil.data$dwarf.birch[1:96]
sample_data(physeq)$moss <- soil.data$moss[1:96]
sample_data(physeq)$vaccinium <- soil.data$vaccinium[1:96]
sample_data(physeq)$fern <- soil.data$fern[1:96]
sample_data(physeq)$kinnikinnick <- soil.data$kinnikinnick[1:96]
sample_data(physeq)$heather <- soil.data$heather[1:96]
sample_data(physeq)$eriophorum <- soil.data$eriophorum[1:96]
sample_data(physeq)$fireweed <- soil.data$fireweed[1:96]
sample_data(physeq)$horsetail <- soil.data$horsetail[1:96]
sample_data(physeq)$alder <- soil.data$alder[1:96]
sample_data(physeq)$hogsweed <- soil.data$hogsweed[1:96]
sample_data(physeq)$willow <- soil.data$willow[1:96]
sample_data(physeq)$redberry <- soil.data$redberry[1:96]
sample_data(physeq)$azalea <- soil.data$azalea[1:96]
sample_data(physeq)$slope <- soil.data$slope[1:96]

################################
# ALPHA DIVERSITY BY C:N # 
################################

# filter phyloseq object by Hansen stream
physeq_hansen <- subset_samples(physeq_nobog, Stream=="Hansen")
physeq_nobog <- subset_samples(physeq, Transect != "Bog")
physeq_hansen_close <- subset_samples(physeq_hansen, Distance < 7)
physeq_hansen_left <- subset_samples(physeq_hansen, Side=="SL")
physeq_hansen_left_close <- subset_samples(physeq_hansen, Side=="SL" & Distance < 7)
physeq_hansen_right <- subset_samples(physeq_hansen, Side=="SR")
physeq_happy <- subset_samples(physeq, Stream == "Happy")
physeq_yako <- subset_samples(physeq, Stream == "Yako")

# plot Hansen stream specific alpha diversity metrics (separately due to space) by C.N
p <- plot_richness(physeq_hansen, color="organic.soil.C.N", x="organic.soil.C.N", measures=c("Shannon"))

# boxplot
p + geom_boxplot(data = p$data, aes(x = organic.soil.C.N, y = value, group=organic.soil.C.N, color = NULL), alpha = 0.1)

# or bar graph of means with CI for each category computes the mean confidence interval 
# for each category based on a bootstrapping method (a simulation technique to avoid 
# making an assumption about a normal distribution)
ggplot(data = p$data, aes(organic.soil.C.N, value, group=organic.soil.C.N,  fill="lightblue", fill=organic.soil.C.N)) + 
  stat_summary(fun.data=mean_sdl, geom="bar", position=position_dodge(width=0.95)) + 
  facet_wrap(~variable, ncol=3, scales="free") + 
  stat_summary(fun.data=mean_cl_boot, geom="errorbar", position=position_dodge(width=0.95), width=0.3)

# get numbers for alpha diversity metrics at Hansen
alpha.diversity <- estimate_richness(physeq, measures=c("Observed","Shannon", "Simpson"))
head(alpha.diversity)

# combine phyloseq object with alpha diversity metrics 
data <- cbind(sample_data(physeq), alpha.diversity)

# Is dependent variable normally distributed? If so, use ANOVA!
hist(data$Observed)
hist(log(data$Shannon))

# if p value below is > 0.05, it is normally distributed!
shapiro.test(data$Observed)
shapiro.test(data$Shannon)

# for all data, observed is normally distributed, but Shannon is not

# transformations if needed
data$Observed <- log(data$Observed)
data$Shannon <- log(data$Shannon)

############################
# OVERALL RICHNESS # 
############################

# remove bog sample (because of NA values) and subset by Hansen
physeq_nobog <- subset_samples(physeq, Transect != "Bog")

# for observed if normal, logistic regression model because it is count data
alpha.diversity <- estimate_richness(physeq_nobog, measures=c("Observed","Shannon", "Simpson"))
data <- cbind(sample_data(physeq_nobog), alpha.diversity)
options(na.action = "na.fail")
all.parms <- glm.nb(Observed ~ ln(organic.soil.C.N) + ln(organic.soil.GWC) + ln(Distance) + ln(ph), data=data)
all.parms <- lm(Observed ~ ln(organic.soil.C.N) + ln(organic.soil.GWC) + ln(Distance) + ln(ph), data=data)
summary(all.parms)
results<-dredge(all.parms)
subset(results, delta <2)
mod.nb <- glm.nb(Observed ~ ln(Distance) + ln(ph), data=data)
mod.lm <- lm(Observed ~ ln(Distance) + ln(ph), data=data)
summary(mod.lm)

# with other variables, there was no effect of C:N (strongly positive effect of ph)

mod1a <- glm(Observed ~ organic.soil.C.N:Stream, data=data, family="poisson")
mod1b <- glm.nb(Observed ~ organic.soil.C.N:Stream, data=data)
AIC(mod1a, mod1b)
summary(mod1a)
summary(mod1b)

# overall, species richness decreased with C:N (result with just C:N as variable)

####################################
# SPECIES RICHNESS AT HANSEN #
####################################

physeq_hansen <- subset_samples(physeq_nobog, Stream=="Hansen")

alpha.diversity <- estimate_richness(physeq_hansen, measures=c("Observed","Shannon", "Simpson"))
data <- cbind(sample_data(physeq_hansen), alpha.diversity)

# run linear model with all parameters, select best model using AIC and examine it
options(na.action = "na.fail")
all.parms <- lm(Observed ~ ln(organic.soil.C.N) + ln(organic.soil.GWC) + Side + ln(Distance) + Side:ln(Distance) + ln(ph), data=data)
summary(all.parms)
results<-dredge(all.parms)
subset(results, delta <2)
mod <- lm(Observed ~ ln(organic.soil.C.N) + ln(ph), data=data)
summary(mod)

# run negative binomial model with all parameters, select best model using AIC and examine it
options(na.action = "na.fail")
all.parms <- glm.nb(Observed ~ ln(organic.soil.C.N) + ln(organic.soil.GWC) + Side + ln(Distance) + Side:ln(Distance) + ln(ph), data=data)
summary(all.parms)
results<-dredge(all.parms)
subset(results, delta <2)
mod <- glm.nb(Observed ~ ln(organic.soil.C.N) + ln(ph), data=data)
summary(mod)

# at Hansen, overall species richness increased with C:N

# running models with just C:N and side
mod1a <- glm(Observed ~ organic.soil.C.N:Side, data=data, family="poisson")
mod1b <- glm.nb(Observed ~ organic.soil.C.N:Side, data=data)
AIC(mod1a, mod1b)
summary(mod1a)
summary(mod1b)

# no effect (with only C:N variable)

##############################
## RICHNESS BY GUILD ##
##############################

exploration.guild.RA$ph <- as.numeric(exploration.guild.RA$ph)

exploration.guild.RA$carcass.deposition[which(exploration.guild.RA$Transect=="S1" & exploration.guild.RA$Distance==1 & 
                                                exploration.guild.RA$Side=="SL")] <- "yes"
exploration.guild.RA$carcass.deposition[which(exploration.guild.RA$Transect=="S4" & exploration.guild.RA$Distance==1 & 
                                                exploration.guild.RA$Side=="SL")] <- "yes"
exploration.guild.RA$carcass.deposition[which(exploration.guild.RA$Transect=="S6" & exploration.guild.RA$Distance==1 & 
                                                exploration.guild.RA$Side=="SL")] <- "yes"

exploration.guild.RA.nobog <- exploration.guild.RA[which(exploration.guild.RA$Transect != "Bog"),]

# now examine by trophic guild
######################################################
# trophic guild richness by C:N with all data
######################################################

# examine data by stream
ggplot(exploration.guild.RA,aes(x=saprotroph.richness, y=Stream)) + geom_boxplot(alpha=0.2) + stat_summary(fun=mean, geom="point")

# saprotrophs # 

# run linear model with all parameters, select best model using AIC and examine it
all.parms <- lm(saprotroph.richness ~ organic.soil.C.N + mineral.soil.C.N + GWC + white.spruce + paper.birch + dwarf.birch + moss + vaccinium + fern + kinnikinnick + 
                  heather + eriophorum + fireweed + horsetail + alder + hogsweed + willow + redberry + azalea + slope + ln(Distance) + 
                  carcass.deposition + decomposing.carcass, data=exploration.guild.RA)
step.model <- stepAIC(all.parms, direction = "both", trace = FALSE)
summary(step.model)

step.model.lmer <- lmer(formula = saprotroph.richness ~ mineral.soil.C.N + GWC + paper.birch + eriophorum + alder + hogsweed + azalea + (1|Stream:Transect), data = exploration.guild.RA)
summary(step.model.lmer)

# saprotrophic richness increases with GWC and decreased with mineral soil C:N
# (without plants, increases with decomposing carcass and decreases with mineral soil C:N)

# run negative binomial model with all parameters, select best model using AIC and examine it
options(na.action = "na.fail")
all.parms <- glm.nb(saprotroph.richness ~ ln(C.N) + ln(GWC) + ln(Distance) + ln(ph), data=exploration.guild.RA.nobog)
summary(all.parms)
results<-dredge(all.parms)
subset(results, delta <2)
mod <- glm.nb(saprotroph.richness ~ ln(C.N) + ln(Distance) + ln(ph), data=exploration.guild.RA.nobog) 
summary(mod)

# saprotrophic richness does not vary with C:N with all data

mod1a <- glm(saprotroph.richness ~ C.N:Stream, data=exploration.guild.RA, family="poisson") 
mod1b <- glm.nb(saprotroph.richness ~ C.N:Stream, data=exploration.guild.RA) 
summary(mod1b)

# symbiotrophs #

# run linear model with all parameters, select best model using AIC and examine it
all.parms <- lm(symbiotroph.richness ~ organic.soil.C.N + mineral.soil.C.N + GWC + slope + ln(Distance) + white.spruce + paper.birch + dwarf.birch + moss + vaccinium + fern + kinnikinnick + 
                  heather + eriophorum + fireweed + horsetail + alder + hogsweed + willow + redberry + azalea +
                  carcass.deposition + decomposing.carcass, data=exploration.guild.RA)
step.model <- stepAIC(all.parms, direction = "both", trace = FALSE)
summary(step.model)

step.model.lmer <- lmer(formula = symbiotroph.richness ~ ln(Distance) + paper.birch + vaccinium + heather + eriophorum + horsetail + alder + hogsweed + azalea + carcass.deposition + 
                          (1|Stream:Transect), data = exploration.guild.RA)
summary(step.model.lmer)

# run negative binomial model with all parameters, select best model using AIC and examine it
options(na.action = "na.fail")
all.parms <- glm.nb(saprotroph.richness ~ ln(C.N) + ln(GWC) + ln(Distance) + ln(ph), data=exploration.guild.RA.nobog)
summary(all.parms)
results<-dredge(all.parms)
subset(results, delta <2)
mod <- glm.nb(saprotroph.richness ~ ln(C.N) + ln(Distance) + ln(ph), data=exploration.guild.RA.nobog) 
summary(mod)

# symbiotrophic richness does not vary with C:N with all data

mod2a <- glm(symbiotroph.richness ~ C.N:Stream, data=exploration.guild.RA, family="poisson") 
mod2b <- glm.nb(symbiotroph.richness ~ C.N:Stream, data=exploration.guild.RA) 
AIC(mod2a, mod2b)
summary(mod2a)
summary(mod2b)

#overall, saprotrophic richness decreased with C:N and symbiotrophic richness increased with C:N (result with using just C:N variable)

######################################################
# trophic guild richness by C:N at Hansen
######################################################

hansen <- exploration.guild.RA[which(exploration.guild.RA$Stream=="Hansen"),]

# saprotrophs # 

# run linear model with all parameters, select best model using AIC and examine it
options(na.action = "na.fail")
all.parms <- lm(saprotroph.richness ~ ln(C.N) + ln(GWC) + Side + ln(Distance) + Side:ln(Distance) + ln(ph), data=hansen)
summary(all.parms)
results<-dredge(all.parms)
subset(results, delta <2)
mod <- lm(saprotroph.richness ~ ln(C.N) + ln(GWC) + ln(ph), data=hansen)
summary(mod)

# run negative binomial model with all parameters, select best model using AIC and examine it
options(na.action = "na.fail")
all.parms <- glm.nb(saprotroph.richness ~ ln(C.N) + ln(GWC) + Side + ln(Distance) + Side:ln(Distance) + ln(ph), data=hansen)
summary(all.parms)
results<-dredge(all.parms)
subset(results, delta <2)
mod <- glm.nb(saprotroph.richness ~ ln(GWC) + ln(ph), data=hansen)
summary(mod)

# saprotrophic richness did not vary with C:N at Hansen

mod1a <- glm(saprotroph.richness ~ C.N:Side, data=hansen, family="poisson") 
mod1b <- glm.nb(saprotroph.richness ~ C.N:Side, data=hansen) 
mod1c <- lm(saprotroph.richness ~ C.N:Side, data=hansen) 
AIC(mod1a, mod1b,)
summary(mod1a)
summary(mod1b)
summary(mod1c)

# saprotrophic richness decreases with C:N on right bank, but not left bank (result with only C:N as variable? but can't seem to replicate it)

# symbiotrophs # 

# run linear model with all parameters, select best model using AIC and examine it
options(na.action = "na.fail")
all.parms <- lm(symbiotroph.richness ~ ln(C.N) + ln(GWC) + Side + ln(Distance) + Side:ln(Distance) + ln(ph), data=hansen)
summary(all.parms)
results<-dredge(all.parms)
subset(results, delta <2)
mod <- lm(symbiotroph.richness ~ ln(C.N) + ln(Distance) + ln(ph), data=hansen)
summary(mod)

# run negative binomial model with all parameters, select best model using AIC and examine it
options(na.action = "na.fail")
all.parms <- glm.nb(symbiotroph.richness ~ ln(C.N) + ln(GWC) + Side + ln(Distance) + Side:ln(Distance) + ln(ph), data=hansen)
summary(all.parms)
results<-dredge(all.parms)
subset(results, delta <2)
mod <- glm.nb(symbiotroph.richness ~ ln(C.N) + ln(Distance) + ln(ph), data=hansen)
summary(mod)

# symbiotrophic richness increased with C:N at Hansen

mod1 <- lm(symbiotroph.richness ~ C.N:Side, data=hansen) 
mod2 <- glm(symbiotroph.richness ~ C.N:Side, data=hansen, family="poisson") 
mod3 <- glm.nb(symbiotroph.richness ~ C.N:Side, data=hansen) 
summary(mod1)
summary(mod2)
summary(mod3)

# symbiotrophic richness increases with C:N on both banks! (old result)

########################################
# overall by exploration type
#############################################

# long-distance #

# run linear model with all parameters, select best model using AIC and examine it
all.parms <- lm(long.richness ~ organic.soil.C.N + mineral.soil.C.N + GWC + white.spruce + paper.birch + dwarf.birch + moss + vaccinium + fern + kinnikinnick + 
                  heather + eriophorum + fireweed + horsetail + alder + hogsweed + willow + redberry + azalea + slope + ln(Distance) + 
                  carcass.deposition + decomposing.carcass, data=exploration.guild.RA)
step.model <- stepAIC(all.parms, direction = "both", trace = FALSE)
summary(step.model)

step.model.lmer <- lmer(formula = long.richness ~ organic.soil.C.N + paper.birch + vaccinium + heather + hogsweed + azalea + (1|Stream:Transect), data = exploration.guild.RA)
summary(step.model.lmer)

# increased with organic soil C:N

# same model as above but just Hansen data
all.parms <- lm(long.richness ~ C.N + GWC + dwarf.birch + moss + vaccinium + fern + kinnikinnick + 
                  heather + eriophorum + fireweed + horsetail + azalea + slope + Distance + 
                  carcass.deposition + decomposing.carcass, data=hansen)
step.model <- stepAIC(all.parms, direction = "both", trace = FALSE)
summary(step.model)

# run negative binomial model with all parameters, select best model using AIC and examine it
options(na.action = "na.fail")
all.parms <- glm.nb(long.richness ~ ln(organic.soil.C.N) + ln(GWC) + carcass.deposition + decomposing.carcass, data=exploration.guild.RA)
summary(all.parms)
results<-dredge(all.parms)
subset(results, delta <2)
mod <- glm.nb(long.richness ~ ln(C.N) + ln(Distance) + ln(ph), data=exploration.guild.RA.nobog) 
summary(mod)

# long richness MAYBE increased with C.N for all data (p=0.08 for lm model)

mod1a <- glm(long.richness ~ C.N:Stream, data=exploration.guild.RA, family="poisson") 
mod1b <- glm.nb(long.richness ~ C.N:Stream, data=exploration.guild.RA) 
summary(mod1a)

# long distance richness decreased with C:N at Hansen and Yako (result with just C:N as variable)

# short-distance #

# run linear model with all parameters, select best model using AIC and examine it
all.parms <- lm(short.richness ~ organic.soil.C.N + mineral.soil.C.N + GWC + white.spruce + paper.birch + dwarf.birch + moss + vaccinium + fern + kinnikinnick + 
                  heather + eriophorum + fireweed + horsetail + alder + hogsweed + willow + redberry + azalea + slope + ln(Distance) + 
                  carcass.deposition + decomposing.carcass, data=exploration.guild.RA)
step.model <- stepAIC(all.parms, direction = "both", trace = FALSE)
summary(step.model)

# short richness decreased with mineral soil C:N
# without plants, short richness decreased with slope

# run negative binomial model with all parameters, select best model using AIC and examine it
options(na.action = "na.fail")
all.parms <- glm.nb(short.richness ~ ln(C.N) + ln(GWC) + ln(Distance) + ln(ph), data=exploration.guild.RA.nobog)
summary(all.parms)
results<-dredge(all.parms)
subset(results, delta <2)
mod <- glm.nb(short.richness ~ ln(C.N) + ln(Distance) + ln(ph), data=exploration.guild.RA.nobog) 
summary(mod)

# short distance did not vary with C:N for all data

mod2a <- glm(short.richness ~ C.N:Stream, data=exploration.guild.RA, family="poisson") 
mod2b <- glm.nb(short.richness ~ C.N:Stream, data=exploration.guild.RA) 
AIC(mod2a, mod2b)
summary(mod2a)
summary(mod2b)

all.parms <- lm(medium.fringe.richness ~ organic.soil.C.N + mineral.soil.C.N + GWC + white.spruce + paper.birch + dwarf.birch + moss + vaccinium + fern + kinnikinnick + 
                  heather + eriophorum + fireweed + horsetail + alder + hogsweed + willow + redberry + azalea + slope + ln(Distance) + 
                  carcass.deposition + decomposing.carcass, data=exploration.guild.RA)
step.model <- stepAIC(all.parms, direction = "both", trace = FALSE)
summary(step.model)

# medium fringe richness increased with carcass deposition and distance

all.parms <- lm(medium.smooth.richness ~ organic.soil.C.N + mineral.soil.C.N + GWC + white.spruce + paper.birch + dwarf.birch + moss + vaccinium + fern + kinnikinnick + 
                  heather + eriophorum + fireweed + horsetail + alder + hogsweed + willow + redberry + azalea + slope + ln(Distance) + 
                  carcass.deposition + decomposing.carcass, data=exploration.guild.RA)
step.model <- stepAIC(all.parms, direction = "both", trace = FALSE)
summary(step.model)

step.model.lmer <- lmer(formula = medium.smooth.richness ~ vaccinium + heather + eriophorum + horsetail + hogsweed + redberry + azalea + carcass.deposition + (1|Stream:Transect), data = exploration.guild.RA)
summary(step.model.lmer)

# medium smooth richness increased with carcass deposition (almost significant)

#############################################
# exploration type at Hansen # 
#############################################

# long distance # 

# run linear model with all parameters, select best model using AIC and examine it
options(na.action = "na.fail")
all.parms <- lm(long.richness ~ ln(C.N):Side + ln(GWC) + Side + ln(Distance) + Side:ln(Distance) + ln(ph), data=hansen)
summary(all.parms)
results<-dredge(all.parms)
subset(results, delta <2)
mod <- lm(long.richness ~ ln(C.N):Side + ln(GWC) + ln(ph) + Side, data=hansen)
summary(mod)

# long distance species richness increased with C:N on the left bank, but not on the right bank

# run negative binomial model with all parameters, select best model using AIC and examine it
options(na.action = "na.fail")
all.parms <- glm.nb(long.richness ~ ln(C.N):Side + ln(GWC) + Side + ln(Distance) + Side:ln(Distance) + ln(ph), data=hansen)
summary(all.parms)
results<-dredge(all.parms)
subset(results, delta <2)
mod <- glm.nb(long.richness ~ ln(C.N):Side + ln(GWC) + ln(ph), data=hansen)
summary(mod)

# no effect of C:N

mod1 <- glm(long.richness ~ C.N:Side, data=hansen, family="poisson") 
mod2 <- glm.nb(long.richness ~ C.N:Side, data=hansen) 
summary(mod1)
summary(mod2)

# long distance richness increased with C:N at Hansen (old results with only C:N)

# short distance # 

# run linear model with all parameters, select best model using AIC and examine it
options(na.action = "na.fail")
all.parms <- lm(short.richness ~ ln(C.N) + ln(GWC) + Side + ln(Distance) + Side:ln(Distance) + ln(ph), data=hansen)
summary(all.parms)
results<-dredge(all.parms)
subset(results, delta <2)
mod <- lm(short.richness ~ ln(C.N) + ln(ph), data=hansen)
summary(mod)

# run negative binomial model with all parameters, select best model using AIC and examine it
options(na.action = "na.fail")
all.parms <- glm.nb(short.richness ~ ln(C.N) + ln(GWC) + Side + ln(Distance) + Side:ln(Distance) + ln(ph), data=hansen)
summary(all.parms)
results<-dredge(all.parms)
subset(results, delta <2)
mod <- glm.nb(short.richness ~ ln(C.N) + ln(ph), data=hansen)
summary(mod)

# short richness did not vary with C:N

mod1 <- glm(short.richness ~ C.N:Side, data=hansen, family="poisson") 
mod2 <- glm.nb(short.richness ~ C.N:Side, data=hansen) 
AIC(mod1, mod2)
summary(mod1)
summary(mod2)

# short distance species richness increased with C:N on the left bank, but not on the right bank (old results with only C:N)

########################
# SHANNON DIVERSITY # 
#######################

# overall
alpha.diversity <- estimate_richness(physeq_nobog, measures=c("Observed","Shannon", "Simpson"))
data <- cbind(sample_data(physeq_nobog), alpha.diversity)
options(na.action = "na.fail")
all.parms <- lm(Shannon ~ ln(organic.soil.C.N) + ln(organic.soil.GWC) + ln(Distance) + ln(ph), data=data)
summary(all.parms)
results<-dredge(all.parms)
subset(results, delta <2)
mod.lm <- lm(Shannon ~ ln(organic.soil.C.N) + ln(ph), data=data)
summary(mod.lm)

# no effect of C:N on overall diversity

mod1 <- lm(Shannon ~ organic.soil.C.N:Stream, data=data)
summary(mod1)

# no effect (old results)

# at hansen
alpha.diversity <- estimate_richness(physeq_hansen, measures=c("Observed","Shannon", "Simpson"))
data <- cbind(sample_data(physeq_hansen), alpha.diversity)
options(na.action = "na.fail")
all.parms <- lm(Shannon ~ ln(organic.soil.C.N):Side + ln(organic.soil.GWC) + Side + ln(Distance) + Side:ln(Distance) + ln(ph), data=data)
summary(all.parms)
results<-dredge(all.parms)
subset(results, delta <2)
mod.lm <- lm(Shannon ~ ln(organic.soil.C.N):Side + Side + ln(ph), data=data)
summary(mod.lm)

# species diversity increased with C:N at Hansen on both sides, but increased more strongly on the left bank

mod1 <- lm(Shannon ~ organic.soil.C.N:Side, data=data)
summary(mod1)

# no effect (old results)

####################
# DIVERSITY BY GUILD OVERALL #
######################

# trophic guild diversity by C:N with all data

# remove one point with no value for saprotroph diversity
exploration.guild.RA.sap.div <- exploration.guild.RA %>% drop_na(saprotroph.diversity)

# saprotroph diversity # 
all.parms <- lm(saprotroph.diversity ~ organic.soil.C.N + mineral.soil.C.N + GWC + white.spruce + paper.birch + dwarf.birch + moss + vaccinium + fern + kinnikinnick + 
                  heather + eriophorum + fireweed + horsetail + alder + hogsweed + willow + redberry + azalea + slope + ln(Distance) + 
                  carcass.deposition + decomposing.carcass, data=exploration.guild.RA.sap.div)
step.model <- stepAIC(all.parms, direction = "both", trace = FALSE)
summary(step.model)

# saprotroph diversity increased with decomposing carcass and organic soil C:N
# (without plants, increased with decomposing carcass)

mod1 <- lm(saprotroph.diversity ~ C.N:Stream, data=exploration.guild.RA)
summary(mod1)

# symbiotroph diversity # 
all.parms <- lm(symbiotroph.diversity ~ organic.soil.C.N + mineral.soil.C.N + GWC + white.spruce + paper.birch + dwarf.birch + moss + vaccinium + fern + kinnikinnick + 
                  heather + eriophorum + fireweed + horsetail + alder + hogsweed + willow + redberry + azalea + slope + ln(Distance) + 
                  carcass.deposition + decomposing.carcass, data=exploration.guild.RA)
step.model <- stepAIC(all.parms, direction = "both", trace = FALSE)
summary(step.model)

# symbiotroph diversity decreased with mineral soil C:N
# without plants, decreased with carcass deposition!!

mod2 <- lm(symbiotroph.diversity ~ C.N:Stream, data=exploration.guild.RA)
summary(mod1)
summary(mod2)

##############################################
# trophic guild diversity at Hansen # 
##############################################

hansen <- exploration.guild.RA[which(exploration.guild.RA$Stream=="Hansen"),]

# remove one point with no value for saprotroph diversity
hansen.sap.div <- hansen %>% drop_na(saprotroph.diversity)

# run linear model with all parameters, select best model using AIC and examine it
options(na.action = "na.fail")
all.parms <- lm(saprotroph.diversity ~ ln(C.N):Side + ln(GWC) + Side + ln(Distance) + Side:ln(Distance) + ln(ph), data=hansen.sap.div)
summary(all.parms)
results<-dredge(all.parms)
subset(results, delta <2)
mod <- lm(saprotroph.diversity ~ ln(GWC) + ln(ph), data=hansen.sap.div)
summary(mod)

# at Hansen, saprotrophic diversity does not vary with C:N

mod1 <- lm(saprotroph.diversity ~ C.N:Side, data=hansen.sap.div)
mod2 <- mblm(saprotroph.diversity ~ C.N, data=hansen.sap.div)
summary(mod1)
summary(mod2)

# symbiotroph diversity with C:N at Hansen 

# run linear model with all parameters, select best model using AIC and examine it
options(na.action = "na.fail")
all.parms <- lm(symbiotroph.diversity ~ ln(C.N) + ln(GWC) + Side + ln(Distance) + Side:ln(Distance) + ln(ph), data=hansen)
summary(all.parms)
results<-dredge(all.parms)
subset(results, delta <2)
mod <- lm(symbiotroph.diversity ~ ln(C.N) + ln(Distance), data=hansen)
summary(mod)

# symbiotrophc diversity did not vary with C:N at Hansen

hansen.left <- exploration.guild.RA[which(exploration.guild.RA$Stream == "Hansen" & exploration.guild.RA$Side=="SL"),]
hansen.right <- exploration.guild.RA[which(exploration.guild.RA$Stream == "Hansen" & exploration.guild.RA$Side=="SR"),]

# symbiotroph diversity is normally distributed
shapiro.test(hansen.left$symbiotroph.diversity)

mod1 <- lm(symbiotroph.diversity ~ C.N:Side, data=hansen)
mod2 <- mblm(symbiotroph.diversity ~ C.N, data=hansen)
summary(mod1)
summary(mod2)

mod1 <- lm(symbiotroph.diversity ~ C.N, data=hansen.left)
mod2 <- mblm(symbiotroph.diversity ~ C.N, data=hansen.left)
summary(mod1)
summary(mod2)

mod1 <- lm(symbiotroph.diversity ~ C.N, data=hansen.right)
mod2 <- mblm(symbiotroph.diversity ~ C.N, data=hansen.right)
summary(mod1)
summary(mod2)

#overall, saprotroph diversity decreased wtih C:N at Hansen (old results)
# maybe symbiotrophc diversity increased with C:N on right bank of Hansen (old results)

####################
# DIVERSITY BY GUILD OVERALL #
######################

# remove one point with no value for long diversity
exploration.guild.RA.long.div <- exploration.guild.RA %>% drop_na(long.diversity)

# exploration type diversity by C:N with all data

# long diversity # 
all.parms <- lm(long.diversity ~ organic.soil.C.N + mineral.soil.C.N + GWC + white.spruce + paper.birch + dwarf.birch + moss + vaccinium + fern + kinnikinnick + 
                  heather + eriophorum + fireweed + horsetail + alder + hogsweed + willow + redberry + azalea + slope + ln(Distance) + 
                  carcass.deposition + decomposing.carcass, data=exploration.guild.RA.long.div)
step.model <- stepAIC(all.parms, direction = "both", trace = FALSE)
summary(step.model)

# long diversity decreased with paper birch and moss (without plants, long diversity decreased with C:N )
# without plants, long diversity decreased with carcass deposition (OLD MODEL)

mod1 <- lm(long.diversity ~ C.N:Stream, data=exploration.guild.RA)
summary(mod1)

# remove one point with no value for long diversity
exploration.guild.RA.short.div <- exploration.guild.RA %>% drop_na(short.diversity)

# short diversity # 
all.parms <- lm(short.diversity ~ organic.soil.C.N + mineral.soil.C.N + GWC + white.spruce + paper.birch + dwarf.birch + moss + vaccinium + fern + kinnikinnick + 
                  heather + eriophorum + fireweed + horsetail + alder + hogsweed + willow + redberry + azalea + slope + ln(Distance) + 
                  carcass.deposition + decomposing.carcass, data=exploration.guild.RA.short.div)
step.model <- stepAIC(all.parms, direction = "both", trace = FALSE)
summary(step.model)

# short diversity decreased with mineral soil C:N and carcass deposition
# without plants, decreased with carcass deposition

mod2 <- lm(short.diversity ~ C.N:Stream, data=exploration.guild.RA)
summary(mod1)
summary(mod2)

# medium diversity #

exploration.guild.RA.medium.div <- exploration.guild.RA %>% drop_na(medium.fringe.diversity)

all.parms <- lm(medium.fringe.diversity ~ organic.soil.C.N + mineral.soil.C.N + GWC + white.spruce + paper.birch + dwarf.birch + moss + vaccinium + fern + kinnikinnick + 
                  heather + eriophorum + fireweed + horsetail + alder + hogsweed + willow + redberry + azalea + slope + ln(Distance) + 
                  carcass.deposition + decomposing.carcass, data=exploration.guild.RA.medium.div)
step.model <- stepAIC(all.parms, direction = "both", trace = FALSE)
summary(step.model)

# medium diversity increased with mineral soil C:N and decomposing carcass
# (without plants, medium diversity increased with decomposing carcass and decreased with C:N)

exploration.guild.RA.medium.div <- exploration.guild.RA %>% drop_na(medium.smooth.diversity)

all.parms <- lm(medium.smooth.diversity ~ organic.soil.C.N + mineral.soil.C.N + GWC + white.spruce + paper.birch + dwarf.birch + moss + vaccinium + fern + kinnikinnick + 
                  heather + eriophorum + fireweed + horsetail + alder + hogsweed + willow + redberry + azalea + slope + ln(Distance) + 
                  carcass.deposition + decomposing.carcass, data=exploration.guild.RA.medium.div)
step.model <- stepAIC(all.parms, direction = "both", trace = FALSE)
summary(step.model)

# exploration richness by C:N with all data at Hansen

# long diversity

# remove one point with no value for saprotroph diversity
hansen.long.div <- hansen %>% drop_na(long.diversity)

# run linear model with all parameters, select best model using AIC and examine it
options(na.action = "na.fail")
all.parms <- lm(long.diversity ~ ln(C.N):Side + ln(GWC) + Side + ln(Distance) + Side:ln(Distance) + ln(ph), data=hansen.long.div)
summary(all.parms)
results<-dredge(all.parms)
subset(results, delta <2)
mod <- lm(long.diversity ~ ln(C.N):Side + ln(Distance) + ln(ph) + Side, data=hansen.long.div)
summary(mod)

# long diversity only increases with C:N on the right bank

mod1 <- lm(long.diversity ~ C.N:Side, data=hansen)

# shor diversity

# run linear model with all parameters, select best model using AIC and examine it
options(na.action = "na.fail")
all.parms <- lm(short.diversity ~ ln(C.N):Side + ln(GWC) + Side + ln(Distance) + Side:ln(Distance) + ln(ph), data=hansen)
summary(all.parms)
results<-dredge(all.parms)
subset(results, delta <2)
mod <- lm(long.diversity ~ Side + ln(GWC), data=hansen.long.div)
summary(mod)

# short diversity does not vary with C:N at Hansen 

mod2 <- lm(short.diversity ~ C.N:Side, data=hansen)
summary(mod1)
summary(mod2)

# no effect








# model with full model structure and dredge
options(na.action = "na.fail")
all.parms <- lm(log(Observed) ~ organic.soil.C.N, data=data)
results<-dredge(all.parms)
subset(results, delta <2)

mod2 <- lm(log(Observed) ~ ln(Distance) + Side + Transect, data=data)
mod3 <- lm(log(Observed) ~ C.N + Side, data=data)
mod4 <- lm(log(Observed) ~ C.N + Side, data=data)
mod5 <- lm(log(Observed) ~ C.N + Side + Side:C.N, data=data)
mod6 <- lm(log(Observed) ~ C.N + Transect + C.N:Transect, data=data)
AIC(mod1, mod2, mod3, mod4, mod5, mod6)
summary(mod3)
plot(mod1)

# model with full model structure
mod1 <- lm(Shannon ~ Side, data=data)
mod2 <- lm(Shannon ~ C.N, data=data)
mod3 <- lm(Shannon ~ C.N + Side, data=data)
mod4 <- lm(Shannon ~ C.N + Side, data=data)
mod5 <- lm(Shannon ~ C.N + Side + Side:C.N, data=data)
mod6 <- lm(Shannon ~ C.N + Transect, data=data)
AIC(mod1, mod2, mod3, mod4, mod5, mod6)
summary(mod6)
plot(mod1)

# visualize diversity increasing with C:N
pred_interval <- seq(11,40,1)
example <- data.frame(C.N=pred_interval)
pred1 <- predict(mod2, newdata=example, re.form=NA, type="response", interval='confidence')
pred1 <- as.data.frame(pred1)
pred1$C.N <- seq(11,40,1)

ggplot(NULL, aes(x,y)) + 
  geom_boxplot(data = p$data, aes(x = C.N, y = value, group=C.N, color = NULL), alpha = 0.1)+
  geom_line(data=pred1, aes(x=C.N, y=fit)) + 
  geom_line(data=pred1, aes(x=C.N, y=fit+0.05*upr), linetype="dotted")+
  geom_line(data=pred1, aes(x=C.N, y=fit-0.05*upr), linetype="dotted")+ 
  theme(panel.background = element_blank(),panel.border = element_rect(colour = "black", 
 fill = NA, linewidth = 1.2)) + labs(y = "Shannon Diversity", x="C.N")+ylim(c(1.5,4))


# filter phyloseq object by right or left side of Hansen stream
physeq_hansen_right <- subset_samples(physeq_hansen, Side=="SR")
physeq_hansen_left <- subset_samples(physeq_hansen, Side=="SL")

# plot Hansen stream specific alpha diversity metrics (separately due to space) by C.N
p <- plot_richness(physeq_hansen_left, color="C.N", x="C.N", measures=c("Shannon"))

# boxplot
p + geom_boxplot(data = p$data, aes(x = C.N, y = value, group=C.N, color = NULL), alpha = 0.1)

# put C.N 5 with 6 for consistency (only for right bank!)
p$data$C.N[17] <- 6

# or bar graph of means with CI for each category computes the mean confidence interval 
# for each category based on a bootstrapping method (a simulation technique to avoid 
# making an assumption about a normal distribution)
ggplot(data = p$data, aes(C.N, value, group=C.N,  fill="lightblue", fill=C.N)) + 
  stat_summary(fun.data=mean_sdl, geom="bar", position=position_dodge(width=0.95)) + 
  facet_wrap(~variable, ncol=3, scales="free") + 
  stat_summary(fun.data=mean_cl_boot, geom="errorbar", 
  position=position_dodge(width=0.95), width=0.3) + ylab("Shannon Diversity") + theme_bw()+
  scale_y_continuous(limits=c(0,4),expand = c(0, 0)) + theme( strip.text.x = element_blank(),
                                                              legend.position="none") + xlab("C.N (m)")

# boxplot with model prediction
pred_interval <- c(1,3,6,10,20,40,60,100)
example <- data.frame(C.N=pred_interval)
pred1 <- predict(mod6, newdata=example, re.form=NA, type="response", interval='confidence')
pred1 <- as.data.frame(pred1)
pred1$C.N <- c(1,3,6,10,20,40,60,100)

ggplot(NULL, aes(x,y)) + 
  geom_boxplot(data = p$data, aes(x = C.N, y = value, group=C.N, color = NULL), alpha = 0.1)+
  geom_line(data=pred1, aes(x=C.N, y=fit)) + 
  geom_line(data=pred1, aes(x=C.N, y=fit+0.1*upr), linetype="dotted")+
  geom_line(data=pred1, aes(x=C.N, y=fit-0.1*upr), linetype="dotted")+ 
  theme(panel.background = element_blank(),panel.border = element_rect(colour = "black", 
                                                                       fill = NA, linewidth = 1.2)) + labs(y = "Shannon Diversity", x="C.N")
